from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule

from scrapy_05_duShu.items import ReadBookItem


class ReadSpider(CrawlSpider):
    name = "duShu"
    allowed_domains = ["www.dushu.com"]
    start_urls = ["https://www.dushu.com/book/1188_1.html"]
    #  follow : 是否跟进，true-有多少页就下载多少页;false-只下载能看见的页面
    rules = (Rule(LinkExtractor(allow=r"/book/1188_\d+\.html"), callback="parse_item", follow=True),)

    def parse_item(self, response):
        div_list = response.xpath('//div[@class="bookslist"]//div[@class="book-info"]')
        for div in div_list:
            img = div.xpath('.//img/@data-original').extract_first()
            name = div.xpath('.//img/@alt').extract_first()
            author = div.xpath('.//p[1]/text()').extract_first()
            read_book_item = ReadBookItem(name=name, author=author, img=img)
            yield read_book_item
